Compute Delay models

This notebook is a fork of "AnkleSLIP - find minimal model". Here, the relative remaining variance for several delay coordinates is computed to establish the claim that no information from the past is used.

  • Dec 10th, 2013 MM: notebook forked
  • Jan 8th, 2014 MM: compressed to 1 panel, all subjects

In [1]:
%cd /home/moritz/mmnotebooks


/qnap/pylibs_users/mm/mmnotebooks

In [2]:
#%%capture
%config InlineBackend.close_figures = False

# this does actually all the work
import mutils.misc as mi
import libshai.util as ut
import sys


nbfile_base = "AnkleSLIP - find minimal model- Version 3.ipynb"
nbfile_this = "AnkleSLIP - compute Delay Models.ipynb"

mi.run_nbcells(nbfile_base, ['0']) # get basic configuration
conf.startup_n_full_maps = 10
conf.quiet = True

# prepare plot
mi.run_nbcells(nbfile_this, ['prepareFigure',])
               
for subj in [1,2,3,7]:
    conf.subject = subj

    # load data
    mi.run_nbcells(nbfile_base, ['0.1','1']) #load data
    frames = arange(ws1.dataset_full.all_IC_r.shape[0])

    # compute prediction and plot
    mi.run_nbcells(nbfile_this, ['prepareData', 'computePredictions','addToAxes',])
    
mi.run_nbcells(nbfile_this, ['finalizeFigure',])


cslip_forceZeroRef         bool  True       reference values for controlled SLIP maps must be zero or not
dt_medfilter               bool  False      
dt_window                  int  30          
exclude_IC_from_factors    bool  False      
n_factors                  int  5           how many (optimal) factors to select of the full kinematic state
normalize_m                bool  True       
po_average_over_IC         bool  True       average over IC's and T,ymin (alt: parameters) for reference SLIP
quiet                      bool  False      
select_ankle_SLIP          bool  True       
startup_compute_PCA        bool  False      
startup_compute_full_maps  bool  True       
startup_n_full_maps        int  20          
subject                    int  2           
ttype                      int  1           
computing right step done
computing right stride done
computing left step done
computing left stride done
computing right step done
computing right stride done
computing left step done
computing left stride done
computing right step done
computing right stride done
computing left step done
computing left stride done
computing right step done
computing right stride done
computing left step done
computing left stride done

In [6]:


In [3]:
# suppressing output is no longer necessary
#%%capture
%config InlineBackend.close_figures = False
import mutils.misc as mi
import libshai.util as ut
import sys

#cell_ID 0

nbfile = "AnkleSLIP - find minimal model- Version 3.ipynb"


mi.run_nbcells(nbfile, ['0']) # get basic configuration

conf.subject = 7 # 1: all > .35; 2: all >= .5, 3: >= .5 # 7: >= .58
conf.startup_n_full_maps = 10
#conf.cslip_forceZeroRef = False

mi.run_nbcells(nbfile, ['0.1','1']) #load data

frames = arange(ws1.dataset_full.all_IC_r.shape[0])


_saveables                 list (10)        list of types that can be stored
cslip_forceZeroRef         bool  True       reference values for controlled SLIP maps must be zero or not
dt_medfilter               bool  False      
dt_window                  int  30          
exclude_IC_from_factors    bool  False      
n_factors                  int  5           how many (optimal) factors to select of the full kinematic state
normalize_m                bool  True       
po_average_over_IC         bool  True       average over IC's and T,ymin (alt: parameters) for reference SLIP
quiet                      bool  False      
select_ankle_SLIP          bool  True       
startup_compute_PCA        bool  False      
startup_compute_full_maps  bool  True       
startup_n_full_maps        int  20          
subject                    int  2           
ttype                      int  1           
SlipData        list (6)         SlipData for s:7, t:1
_saveables      list (10)        list of types that can be stored
k               <class 'mutils.io.KinData'> KinData object: s:7, t:1, selection: ankles
len of 'ws1.full_markers': 48  (all: 84)

 running extended SLIP - EXP consistency check
==============================================
right steps:
  difference (in stds): +0.000, -0.000, -0.000
  difference (in stds): -0.000, +0.000, -0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, -0.000
  difference (in stds): +0.000, -0.000, -0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, +0.000
  difference (in stds): +0.000, -0.000, +0.000
left steps:
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, -0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, +0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): +0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, +0.000
  difference (in stds): -0.000, -0.000, -0.000

In [3]:
#cell_ID prepareData

ws1.dataset_full.all_kin_r = fda.dt_movingavg(ws1.dataset_full.all_kin_r[frames,:], 
                                              conf.dt_window, conf.dt_medfilter)
ws1.dataset_full.all_kin_l = fda.dt_movingavg(ws1.dataset_full.all_kin_l[frames,:], 
                                              conf.dt_window, conf.dt_medfilter)

ws1.dataset_full.all_IC_rc = fda.dt_movingavg(ws1.dataset_full.all_IC_rc[frames,:],
                                             conf.dt_window, conf.dt_medfilter)
ws1.dataset_full.all_IC_lc = fda.dt_movingavg(ws1.dataset_full.all_IC_lc[frames,:],
                                             conf.dt_window, conf.dt_medfilter)

ws1.dataset_full.all_param_r = fda.dt_movingavg(ws1.dataset_full.all_param_r[frames,:],
                                                conf.dt_window, conf.dt_medfilter)
ws1.dataset_full.all_param_l = fda.dt_movingavg(ws1.dataset_full.all_param_l[frames,:], 
                                                conf.dt_window, conf.dt_medfilter)

In [4]:
#cell_ID computePredictions

# --- make predictions: 



# --- right step
import mutils.statistics as st
idat1 = ws1.dataset_full.all_kin_r[1:, :]
idat2 = hstack([ws1.dataset_full.all_kin_r, roll(ws1.dataset_full.all_kin_l, 1, axis=0)])[1:, :]
idat3 = hstack([ws1.dataset_full.all_kin_r[1:, :], ws1.dataset_full.all_kin_r[:-1, :]])

odat1 = ws1.dataset_full.all_param_r[1:, :]
odat2 = ws1.dataset_full.all_IC_lc[1:, :]

sys.stdout.write("computing right step")
sys.stdout.flush()
rrv1_1 = st.predTest(idat1, odat1, nboot=100)
rrv1_2 = st.predTest(idat2, odat1, nboot=100)
rrv1_3 = st.predTest(idat3, odat1, nboot=100)

rrv2_1 = st.predTest(idat1, odat2, nboot=100)
rrv2_2 = st.predTest(idat2, odat2, nboot=100)
rrv2_3 = st.predTest(idat3, odat2, nboot=100)
print " done"

# --- right stride
sys.stdout.write("computing right stride")
sys.stdout.flush()

idat1 = ws1.dataset_full.all_kin_r[1:-1, :]
idat2 = hstack([ws1.dataset_full.all_kin_r, roll(ws1.dataset_full.all_kin_l, 1, axis=0)])[1:-1, :]
idat3 = hstack([ws1.dataset_full.all_kin_r[:-2, :], ws1.dataset_full.all_kin_r[1:-1, :]])

odat1 = ws1.dataset_full.all_param_l[1:-1, :]
odat2 = ws1.dataset_full.all_IC_rc[2:, :]

rrv3_1 = st.predTest(idat1, odat1, nboot=100)
rrv3_2 = st.predTest(idat2, odat1, nboot=100)
rrv3_3 = st.predTest(idat3, odat1, nboot=100)

rrv4_1 = st.predTest(idat1, odat2, nboot=100)
rrv4_2 = st.predTest(idat2, odat2, nboot=100)
rrv4_3 = st.predTest(idat3, odat2, nboot=100)
print " done"

# --- left step
import mutils.statistics as st
idat1 = ws1.dataset_full.all_kin_l[1:-1, :]
idat2 = hstack([ws1.dataset_full.all_kin_l, ws1.dataset_full.all_kin_r])[1:-1, :]
idat3 = hstack([ws1.dataset_full.all_kin_l[1:-1, :], ws1.dataset_full.all_kin_l[:-2, :]])

odat1 = ws1.dataset_full.all_param_l[2:, :]
odat2 = ws1.dataset_full.all_IC_rc[2:, :]

sys.stdout.write("computing left step")
sys.stdout.flush()
rrv1_1l = st.predTest(idat1, odat1, nboot=100)
rrv1_2l = st.predTest(idat2, odat1, nboot=100)
rrv1_3l = st.predTest(idat3, odat1, nboot=100)

rrv2_1l = st.predTest(idat1, odat2, nboot=100)
rrv2_2l = st.predTest(idat2, odat2, nboot=100)
rrv2_3l = st.predTest(idat3, odat2, nboot=100)
print " done"

# --- left stride
sys.stdout.write("computing left stride")
sys.stdout.flush()

idat1 = ws1.dataset_full.all_kin_l[1:-1, :]
idat2 = hstack([ws1.dataset_full.all_kin_l, ws1.dataset_full.all_kin_r])[1:-1, :]
idat3 = hstack([ws1.dataset_full.all_kin_l[:-2, :], ws1.dataset_full.all_kin_l[1:-1, :]])

odat1 = ws1.dataset_full.all_param_r[2:, :]
odat2 = ws1.dataset_full.all_IC_lc[2:, :]

rrv3_1l = st.predTest(idat1, odat1, nboot=100)
rrv3_2l = st.predTest(idat2, odat1, nboot=100)
rrv3_3l = st.predTest(idat3, odat1, nboot=100)

rrv4_1l = st.predTest(idat1, odat2, nboot=100)
rrv4_2l = st.predTest(idat2, odat2, nboot=100)
rrv4_3l = st.predTest(idat3, odat2, nboot=100)
print " done"


computing right step done
computing right stride done
computing left step done
computing left stride done

In [31]:
close('all')

In [9]:
# cell_ID prepareFigure
import libshai.util as ut
reload(ut)
import matplotlib.gridspec as gridspec

import matplotlib.font_manager as fmt
FM = fmt.FontManager()
#fnt = FM.findfont('Times New Roman') # for Proc R Soc
fnt = FM.findfont('Arial') # for NComm
fp = fmt.FontProperties(fname=fnt)
fp.set_size(9.0)


gs1 = gridspec.GridSpec(1, 1, wspace=.02, left=.125, right=.975)

#figure(figsize=(6.83, 3)) # ProcRSoc
#figure(figsize=(7.03, 3)) # NComm, twocolumn
fig = figure(figsize=(3.4, 3)) # NComm, single
#ax0 = subplot(1,1,1,frameon=False)

rasterize = False # true: better for print (?)
fmt = {'marker' : ',', 'ls' : '', 'color' : '#aa1122' , 'markersize':4, 
       'markeredgecolor' : '#aa1122', 'rasterized': rasterize}
fmt2 = {'marker' : ',', 'ls' : '', 'color' : '#9898ff' , 'markersize':4, 
        'markeredgecolor' : '#9898ff', 'rasterized' : rasterize}

#ffmt = {'name' : "Times New Roman", 'size' : 9}
#ffmt2 = {'family' : 'serif',
#         'serif' : ['Times', 'Times New Roman'],
#         'size' : 9}
#matplotlib.rc('font', **ffmt2)
 


ax1 = subplot(gs1[0,0])



In [10]:
# cell_ID addToAxes
axes(ax1)
# model 1: 1 step delay
for k in xrange(rrv1_1l.shape[1]):
    ut.qqplot(rrv1_1l[:,k],rrv1_2l[:,k], **fmt)
for k in xrange(rrv2_1l.shape[1]):
    ut.qqplot(rrv2_1l[:,k],rrv2_2l[:,k], **fmt)
for k in xrange(rrv3_1l.shape[1]):
    ut.qqplot(rrv3_1l[:,k],rrv3_2l[:,k], **fmt)
for k in xrange(rrv4_1l.shape[1]):
    ut.qqplot(rrv4_1l[:,k],rrv4_2l[:,k], **fmt)

for k in xrange(rrv1_1.shape[1]):
    ut.qqplot(rrv1_1[:,k],rrv1_2[:,k], **fmt)
for k in xrange(rrv2_1.shape[1]):
    ut.qqplot(rrv2_1[:,k],rrv2_2[:,k], **fmt)    
for k in xrange(rrv3_1.shape[1]):
    ut.qqplot(rrv3_1[:,k],rrv3_2[:,k], **fmt)
for k in xrange(rrv4_1.shape[1]):
    ut.qqplot(rrv4_1[:,k],rrv4_2[:,k], **fmt)
    
    
    
# model 2: 1 stride delay
for k in xrange(rrv1_1l.shape[1]):
    ut.qqplot(rrv1_1l[:,k],rrv1_3l[:,k], **fmt2)
for k in xrange(rrv2_1l.shape[1]):
    ut.qqplot(rrv2_1l[:,k],rrv2_3l[:,k], **fmt2)
for k in xrange(rrv3_1l.shape[1]):
    ut.qqplot(rrv3_1l[:,k],rrv3_3l[:,k], **fmt2)
for k in xrange(rrv4_1l.shape[1]):
    ut.qqplot(rrv4_1l[:,k],rrv4_3l[:,k], **fmt2)

for k in xrange(rrv1_1.shape[1]):
    ut.qqplot(rrv1_1[:,k],rrv1_3[:,k], **fmt2)
for k in xrange(rrv2_1.shape[1]):
    ut.qqplot(rrv2_1[:,k],rrv2_3[:,k], **fmt2)
for k in xrange(rrv3_1.shape[1]):
    ut.qqplot(rrv3_1[:,k],rrv3_3[:,k], **fmt2)
for k in xrange(rrv4_1.shape[1]):
    ut.qqplot(rrv4_1[:,k],rrv4_3[:,k], **fmt2)



In [11]:
# cell_ID finalizeFigure
#plot([0,2],[0,2],'-',linewidth=1.5, color='k')
axes(ax1)
gca().set_yticks(arange(8)*.2)
gca().set_yticklabels(arange(8)*.2, fontproperties=fp)

gca().set_xticks(arange(8)*.2)
gca().set_xticklabels(arange(8)*.2, fontproperties=fp)

gca().set_xlim([0, 1.5])
gca().set_ylim([0, 1.5])


grid(1)

#ax2 = subplot(gs1[0,1])


#title('one stride delay', fontproperties=fp)
plot([0,2],[0,2],'-',linewidth=1, color='k')
gca().set_xlim([0, 1.5])
gca().set_ylim([0, 1.5])
#gca().set_yticklabels([])

gca().set_xticks(arange(8)*.2)
gca().set_xticklabels(arange(8)*.2, fontproperties=fp)



grid(1)

# annotation arrows
ax1.arrow(.57, .57, -.15, .15, head_width=0.02, head_length=0.04, fc='k', ec='k')
ax1.text(.4, .8, 'enlarged model is\n more predictive', fontproperties=fp, ha='center', va='bottom',
         bbox=dict(facecolor='#ffffff', edgecolor='None', pad=5.0))

ax1.arrow(.57, .57, .15, -.15, head_width=0.02, head_length=0.04, fc='k', ec='k')
ax1.text(.77, .3, 'reference model is\n more predictive', fontproperties=fp, ha='center', va='top',
         bbox=dict(facecolor='#ffffff', edgecolor='None', pad=5.0))

# legend axis
axL = axes([.15,.75,.45,.125])
axL.set_xticks([])
axL.set_yticks([])
axL.text(0, 1, 'one step delay', fontproperties=fp, va='center')
axL.text(0, 0, 'one stride delay', fontproperties=fp, va='center')
axL.plot([-.1], [1], '.', color=fmt['color'])
axL.plot([-.1], [0], '.', color=fmt2['color'])
axL.set_xlim(-.2,1.2)
axL.set_ylim(-.5,1.5)

ax1.set_ylabel('rrv of full state model (reference)', fontproperties=fp)

# common frame for label
#ax0.plot(randn(1000))
#ax0 = axes([.1,.075,.8,.8], frameon=False)
#ax0.set_xticks([])
#ax0.set_yticks([])
#ax0.set_xlabel('corresponding quantile of enlarged model', fontproperties=fp)
ax1.set_xlabel('rrv of enlarged models', fontproperties=fp)
ax1.set_title('Q-Q plots of relative remaining variance', fontproperties=fp)

#ax1.set_rasterized(False)

savefig('img/qq_delaymdodels_all.pdf', dpi=60)
savefig('img/qq_delaymdodels_all.svg', dpi=60)
pass



In [6]:
savefig('img/qq_delaymdodels_all.pdf')
savefig('img/qq_delaymdodels_all.svg')

Step 2: compute analytical (statistical) expression

The idea is: "A bootstrapped distribution is an estimation of the probability of the true value beeing that value". We now compute the probability of "A(bootstrapped)" > "B(bootstrapped)", which is basically: "How large is the probability that a random sample, drawn from A, is larger than a random sample, drawn from B?"

This is the chance that A(true value) is larger than B(true value)


In [125]:
# do these histograms "statistically overlap?"
figure()
hist(rrv4_1l[:,1], alpha=.5)
hist(rrv4_2l[:,1], alpha=.5)


Out[125]:
(array([ 6,  7, 21, 14, 23, 17,  5,  5,  1,  1]),
 array([ 0.73518366,  0.76097786,  0.78677206,  0.81256626,  0.83836046,
        0.86415466,  0.88994887,  0.91574307,  0.94153727,  0.96733147,
        0.99312567]),
 <a list of 10 Patch objects>)

In [82]:
17.5 / 2.56


Out[82]:
6.8359375

In [ ]:
# define test functions

from numpy import histogram, cumsum

def get_cpdf(data, bins=100):
    """
    returns the empirical cummulative probability density function.
    Code from Stackoverflow (thanks to alex-i)
    
    :args:
        data (1d array): empirical data
        bins (int): number of bins
    
    :returns:
        x, y (1d arrays): the empirical pdf (cummulative)
        
    """
    counts, bins = numpy.histogram(data, bins=bins, density=True)
    cum_counts = numpy.cumsum(counts)
    bin_widths = (bins[1:] - bins[:-1]) # bin_widths has the function of "dx" in the integral
    y = hstack([0, cum_counts*bin_widths])
    x = bins
    return x,y




def p_sampledst(dst1, dst2):
    """
    computes the probability that a random sample drawn from the (empirical) distribution dst1
    is larger than a random sample drawn from the (empirical) distribution dst2
    
    :args:
        dst1 (1d array): empirical distribution to test
        dst2 (1d array): empirical reference distribution
        
    :returns:
        p (float): probability of dst1 > dst2
    """
   
    start = min(min(dst1),min(dst2))
    end = max(max(dst1),max(dst2))
    delta = end - start
    start -= .1*delta
    end += .1*delta
    xi = linspace(start,end,10000) # just to have regular spacing

    cpdf_ref_x, cpdf_ref_y = get_cpdf(dst2) # reference distribution cpdf
    cpdf_test_x, cpdf_test_y = get_cpdf(dst1) # test distribution cpdf

    # interpolate for regular spacing
    cri = interp(xi, cpdf_ref_x, cpdf_ref_y, right=1, left=0)
    cti = interp(xi, cpdf_test_x, cpdf_test_y, right=1, left=0)

    pti = gradient(cti) # cpdf -> pdf

    # compute P(test > ref) = integral( p(test=x)*p(ref > x) dx)
    p_tot = 0.

    for px, py in zip(pti, cri):
        p_tot += px * py
        
    return p_tot

In [ ]:
print "p for 'Is a memory based linear model better?'"
print "results for subject {}\n".format(conf.subject)

print "Right step:"
print "=" * 12
print "  SLIP parameter:"
print "  " + "=" * 15
for dim in range(5):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv1_2[:, dim], rrv1_1[:, dim]), p_sampledst(rrv1_3[:, dim], rrv1_1[:, dim]))

print "  Apex states:"    
print "  " + "=" * 11
for dim in range(3):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv2_2[:, dim], rrv2_1[:, dim]), p_sampledst(rrv2_3[:, dim], rrv2_1[:, dim]))

print "Right stride:"
print "=" * 12
print "  SLIP parameter:"
print "  " + "=" * 15
for dim in range(5):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv3_2[:, dim], rrv3_1[:, dim]), p_sampledst(rrv3_3[:, dim], rrv3_1[:, dim]))

print "  Apex states:"    
print "  " + "=" * 11
for dim in range(3):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv4_2[:, dim], rrv4_1[:, dim]), p_sampledst(rrv4_3[:, dim], rrv4_1[:, dim]))
    
print "Left step:"
print "=" * 12
print "  SLIP parameter:"
print "  " + "=" * 15
for dim in range(5):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv1_2l[:, dim], rrv1_1l[:, dim]), p_sampledst(rrv1_3l[:, dim], rrv1_1l[:, dim]))

print "  Apex states:"    
print "  " + "=" * 11
for dim in range(3):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv2_2l[:, dim], rrv2_1l[:, dim]), p_sampledst(rrv2_3l[:, dim], rrv2_1l[:, dim]))

print "Left stride:"
print "=" * 12
print "  SLIP parameter:"
print "  " + "=" * 15
for dim in range(5):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv3_2l[:, dim], rrv3_1l[:, dim]), p_sampledst(rrv3_3l[:, dim], rrv3_1l[:, dim]))

print "  Apex states:"    
print "  " + "=" * 11
for dim in range(3):
    print "  param: {} : test1: {:1.5f}, test2: {:1.5f}".format(dim + 1,
                    p_sampledst(rrv4_2l[:, dim], rrv4_1l[:, dim]), p_sampledst(rrv4_3l[:, dim], rrv4_1l[:, dim]))

In [ ]:
rrv1_2.shape

In [ ]:
figure()
plot(ws1.dataset_full.s_param_l[950:1000,:],'.')

In [ ]:
figure()
plot(ws1.dataset_full.all_IC_l[:,0]/ std(ws1.dataset_full.all_IC_l[:,0]), '.')
plot(ws1.dataset_full.all_IC_r[:,0] / std(ws1.dataset_full.all_IC_r[:,0]), '.')

In [ ]: